*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*	Setting up the analysis file
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

	local covs 		asian black hispanic white female sped f_lunch r_lunch fr_lunch esl

	local outcomes	math* ela* total*

	local sectors 	charter innovation alternative magnet contract traditional cmo non_cmo kipp dsst strive

*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

***	build

*	load enrollment

	use "${cleandata}enrollment_long", clear

*	merge tests

	merge 1:1 stu year using "${cleandata}tests", gen(test_merge) keep(1 3) keepusing(test_grade math* ela* total*)

	//missing if grade of test is not grade currently enrolled
	foreach var of varlist math* ela* total*{
		replace `var' = . if test_grade != grade
	}
	//do this to match math, which is tracked and we therefore don't use
	replace ela   = . if inrange(year,2015,2017) & grade == "9"
	replace total = . if inrange(year,2015,2017) & grade == "9"
	drop test_grade

**	merge in pscores

*	clean

	//get baseline vars
	xtset stu year
	foreach x in math ela total sch `covs' `sectors' {
		qui gen bl_`x' = L.`x'
		label var bl_`x' "Baseline `x'"
	}

	//drop EC
	drop if grade == "EC" | grade == "EC "
	destring grade, replace

	//drop if no school
	drop if sch==.

*	merge

	//this is a student by year by school-app file
	gen app_grade = grade
	merge 1:m stu year app_grade using "${cleandata}pscores.dta", gen(pscore_merge)

	//keeping only students with enrollment info
	drop if pscore_merge == 2
	drop app_grade

	//file is now student by year by school-app

*** clean

	//replace missing sector indicators with 0
	mvencode `sectors', mv(0) override

*	school codes

	//differentiate sch codes by grade range
	tostring sch app_sch, replace
	replace sch = "1" + sch if inrange(grade, 0, 5)
	replace sch = "6" + sch if inrange(grade, 6, 8)
	replace sch = "9" + sch if inrange(grade, 9, 12)
	replace app_sch = "1" + app_sch if inrange(grade, 0, 5) & pscore_merge == 3
	replace app_sch = "6" + app_sch if inrange(grade, 6, 8) & pscore_merge == 3
	replace app_sch = "9" + app_sch if inrange(grade, 9, 12) & pscore_merge == 3
	destring sch app_sch, replace

*	flags

	//flag kids with pscores (participating in school choice)
	gen choice_flag = pscore_merge == 3

	//flag kids with math score
	gen math_flag = math != .

*	drop extra vars

	keep stu sch schname grade year year_app `covs' `outcomes' `sectors' ///
		bl_* app_* offer pscore_* *flag

***	reshape to wide (collapse schoolapps within student by year)
	//(made slightly tricky by non-choice sample students)

	local lottovars pscore_form pscore_freq offer

	//flatten pscores for choice sample
	preserve
		drop if app_sch==.
		keep stu app_sch year_app `lottovars'

		reshape wide `lottovars', i(stu year_app) j(app_sch)

		rename *form* *form_*
		rename *freq* *freq_*
		rename offer* offer_*

		tempfile wide_pscores
		save "`wide_pscores'"
	restore

	//drop vars which differ within student/year (which will be merged back in in flattened form)
	drop app_* `lottovars'

	//flatten data to be unique on student/year
	duplicates drop

	//merge in flattened pscores (is 1:1 for choice sample)
	merge m:1 stu year_app using "`wide_pscores'", gen(wide_merge) assert(1 3)

	//replace missing lottery vars with zeros
	qui mvencode pscore_freq* pscore_form* *offer*, mv(0) override

***	enrollment

*	school-level

	sort stu year
	qui levelsof sch, local(allschools)
	foreach sch of local allschools {
		//school enrolled
		gen byte enr_`sch' = (sch == `sch')
	}

***	save

	isid stu year

	qui compress
	save "${cleandata}DEN_basefile", replace
